# Clean MTurk Data
# Created: 01.03.2019

x <- read.csv(".../AH_POBE_MTurk.csv", as.is = TRUE)

table(x$polint, useNA = "always")

# Drop respondents who did not finish survey (answer the political interest question)

x <- x[!is.na(x$polint), ]
table(x$polint, useNA = "always")

# Term used for design question (do agree that...is a government (social) program?)

table(x$term, useNA = "always")

# Policy area

table(x$policy, useNA = "always")

# Main treatment

table(x$treat, useNA = "always")
table(x$policy, x$treat, useNA = "always")

# Create 2 x 4 treatment indicators

x$treatv <- NA
x$treatv[x$treat %in% c("dc", "dr", "dp", "da")] <- "direct"
x$treatv[x$treat %in% c("ic", "ir", "ip", "ia")] <- "indirect"

table(x$treat, x$treatv, useNA = "always")

x$treatd <- NA
x$treatd[x$treat %in% c("dc", "ic")] <- "control"
x$treatd[x$treat %in% c("dr", "ir")] <- "reg"
x$treatd[x$treat %in% c("dp", "ip")] <- "pro"
x$treatd[x$treat %in% c("da", "ia")] <- "all"

table(x$treat, x$treatd, useNA = "always")

# Framing of nutrition policy response (job training policy response is opposite by design)

table(x$resframe, useNA = "always")

# CHECK AND RECODE VARS (IF NEEDED)

# Condense r1 and r1l responses and create including leaners new variable

table(x$hm_r1, x$hm_r1l, useNA = "always")
table(x$ui_r1, x$ui_r1l, useNA = "always")

x$r1 <- NA
x$r1[!is.na(x$hm_r1)] <- x$hm_r1[!is.na(x$hm_r1)]
x$r1[!is.na(x$ui_r1)] <- x$ui_r1[!is.na(x$ui_r1)]

table(x$hm_r1, x$r1, useNA = "always")
table(x$ui_r1, x$r1, useNA = "always")

x$r1_lean <- NA
x$r1_lean[x$r1 %in% c(-1, 1)] <- x$r1[x$r1 %in% c(-1, 1)]
x$r1_lean[!is.na(x$hm_r1l)] <- x$hm_r1l[!is.na(x$hm_r1l)]
x$r1_lean[!is.na(x$ui_r1l)] <- x$ui_r1l[!is.na(x$ui_r1l)]

table(x$hm_r1[x$policy == "hm"], x$r1_lean[x$policy == "hm"], useNA = "always")
table(x$hm_r1l[x$policy == "hm"], x$r1_lean[x$policy == "hm"], useNA = "always")

table(x$ui_r1[x$policy == "ui"], x$r1_lean[x$policy == "ui"], useNA = "always")
table(x$ui_r1l[x$policy == "ui"], x$r1_lean[x$policy == "ui"], useNA = "always")

# Condense r2 and r3 responses

table(x$hm_r2, useNA = "always")
table(x$ui_r2, useNA = "always")

x$r2 <- NA
x$r2[!is.na(x$hm_r2)] <- x$hm_r2[!is.na(x$hm_r2)]
x$r2[!is.na(x$ui_r2)] <- x$ui_r2[!is.na(x$ui_r2)]

table(x$hm_r2, x$r2, useNA = "always")
table(x$ui_r2, x$r2, useNA = "always")

## 

table(x$hm_r3, useNA = "always")
table(x$ui_r3, useNA = "always")

x$r3 <- NA
x$r3[!is.na(x$hm_r3)] <- x$hm_r3[!is.na(x$hm_r3)]
x$r3[!is.na(x$ui_r3)] <- x$ui_r3[!is.na(x$ui_r3)]

table(x$hm_r3, x$r3, useNA = "always")
table(x$ui_r3, x$r3, useNA = "always")

# Condense r4 responses and create leaner response

table(x$hm_r4, x$hm_r4l, useNA = "always")
table(x$ui_r4, x$ui_r4l, useNA = "always")

x$r4 <- NA
x$r4[!is.na(x$hm_r4)] <- x$hm_r4[!is.na(x$hm_r4)]
x$r4[!is.na(x$ui_r4)] <- x$ui_r4[!is.na(x$ui_r4)]

table(x$hm_r4, x$r4, useNA = "always")
table(x$ui_r4, x$r4, useNA = "always")

x$r4_lean <- NA
x$r4_lean[x$r4 %in% c(1, 2)] <- -1
x$r4_lean[x$r4 %in% c(4, 5)] <- 1
  
x$r4_lean[!is.na(x$hm_r4l)] <- x$hm_r4l[!is.na(x$hm_r4l)]
x$r4_lean[!is.na(x$ui_r4l)] <- x$ui_r4l[!is.na(x$ui_r4l)]

table(x$hm_r4[x$policy == "hm"], x$r4_lean[x$policy == "hm"], useNA = "always")
table(x$hm_r4l[x$policy == "hm"], x$r4_lean[x$policy == "hm"], useNA = "always")

table(x$ui_r4[x$policy == "ui"], x$r4_lean[x$policy == "ui"], useNA = "always")
table(x$ui_r4l[x$policy == "ui"], x$r4_lean[x$policy == "ui"], useNA = "always")

##

table(x$r7, useNA = "always")

# Create leaner response for r8

table(x$r8, useNA = "always")

x$r8_lean <- NA
x$r8_lean[x$r8 %in% c(1, 2)] <- 1
x$r8_lean[x$r8 %in% c(-1, -2)] <- -1
x$r8_lean[!is.na(x$r8l)] <- x$r8l[!is.na(x$r8l)]

table(x$r8, x$r8_lean, useNA = "always")
table(x$r8l, x$r8_lean, useNA = "always")

# Create leaner response for r9

table(x$r9, useNA = "always")

x$r9_lean <- NA
x$r9_lean[x$r9 %in% c(1, 2)] <- 1
x$r9_lean[x$r9 %in% c(-1, -2)] <- -1
x$r9_lean[!is.na(x$r9l)] <- x$r9l[!is.na(x$r9l)]

table(x$r9, x$r9_lean, useNA = "always")
table(x$r9l, x$r9_lean, useNA = "always")

##

table(x$r10, useNA = "always")
table(x$r11, useNA = "always")

# TO DO: CONDENSE WHO RESPONSES

# Condense tu and w responses

x$tu <- NA
x$tu[!is.na(x$hm_tu)] <- x$hm_tu[!is.na(x$hm_tu)]
x$tu[!is.na(x$ui_tu)] <- x$ui_tu[!is.na(x$ui_tu)]

x$w_1 <- NA
x$w_1[!is.na(x$hm_w_1)] <- x$hm_w_1[!is.na(x$hm_w_1)]
x$w_1[!is.na(x$ui_w_1)] <- x$ui_w_1[!is.na(x$ui_w_1)]

x$w_2 <- NA
x$w_2[!is.na(x$hm_w_2)] <- x$hm_w_2[!is.na(x$hm_w_2)]
x$w_2[!is.na(x$ui_w_2)] <- x$ui_w_2[!is.na(x$ui_w_2)]

x$w_3 <- NA
x$w_3[!is.na(x$hm_w_3)] <- x$hm_w_3[!is.na(x$hm_w_3)]
x$w_3[!is.na(x$ui_w_3)] <- x$ui_w_3[!is.na(x$ui_w_3)]

x$w_4 <- NA
x$w_4[!is.na(x$hm_w_4)] <- x$hm_w_4[!is.na(x$hm_w_4)]
x$w_4[!is.na(x$ui_w_4)] <- x$ui_w_4[!is.na(x$ui_w_4)]

# Condense "is this govt?" responses

x$isgov <- NA
x$isgov[!is.na(x$hm_g)] <- x$hm_g[!is.na(x$hm_g)]
x$isgov[!is.na(x$ui_g)] <- x$ui_g[!is.na(x$ui_g)]

# TO DO: IS GOV LEANER

# Condense r5 and r6 responses

table(x$r5d, x$r5i, useNA = "always")
table(x$resframe, x$r5d, useNA = "always")
table(x$resframe, x$r5i, useNA = "always")

x$r5 <- NA
x$r5[!is.na(x$r5d)] <- x$r5d[!is.na(x$r5d)]
x$r5[!is.na(x$r5i)] <- x$r5i[!is.na(x$r5i)]

table(x$r5, x$r5d, useNA = "always")
table(x$r5, x$r5i, useNA = "always")

##

table(x$r6d, x$r6i, useNA = "always")
table(x$resframe, x$r6d, useNA = "always")
table(x$resframe, x$r6i, useNA = "always")

x$r6 <- NA
x$r6[!is.na(x$r6d)] <- x$r6d[!is.na(x$r6d)]
x$r6[!is.na(x$r6i)] <- x$r6i[!is.na(x$r6i)]

table(x$r6, x$r6d, useNA = "always")
table(x$r6, x$r6i, useNA = "always")

# r6 is opposite of r5 treatment by design. Create new treatment indicator.

x$resframe2 <- NA
x$resframe2[x$resframe == "direct"] <- "indirect"
x$resframe2[x$resframe == "indirect"] <- "direct"

table(x$resframe, x$resframe2, useNA = "always")

table(x$resframe, x$r6d, useNA = "always")
table(x$resframe, x$r6i, useNA = "always")

table(x$resframe2, x$r6d, useNA = "always")
table(x$resframe2, x$r6i, useNA = "always")

table(x$resframe, x$r6, useNA = "always")
table(x$resframe2, x$r6, useNA = "always")

# Create r5 and r6 leaner responses

table(x$r5d, x$r5dl, useNA = "always")
table(x$r5i, x$r5il, useNA = "always")

x$r5_lean <- NA
x$r5_lean[x$r5 %in% c(1, 2)] <- 1
x$r5_lean[x$r5 %in% c(-1, -2)] <- -1

x$r5_lean[!is.na(x$r5dl)] <- x$r5dl[!is.na(x$r5dl)]
x$r5_lean[!is.na(x$r5il)] <- x$r5il[!is.na(x$r5il)]

table(x$r5d[x$resframe == "direct"], x$r5_lean[x$resframe == "direct"], useNA = "always")
table(x$r5dl[x$resframe == "direct"], x$r5_lean[x$resframe == "direct"], useNA = "always")

table(x$r5i[x$resframe == "indirect"], x$r5_lean[x$resframe == "indirect"], useNA = "always")
table(x$r5il[x$resframe == "indirect"], x$r5_lean[x$resframe == "indirect"], useNA = "always")

##

table(x$r6d, x$r6dl, useNA = "always")
table(x$r6i, x$r6il, useNA = "always")

x$r6_lean <- NA
x$r6_lean[x$r6 %in% c(1, 2)] <- 1
x$r6_lean[x$r6 %in% c(-1, -2)] <- -1

x$r6_lean[!is.na(x$r6dl)] <- x$r6dl[!is.na(x$r6dl)]
x$r6_lean[!is.na(x$r6il)] <- x$r6il[!is.na(x$r6il)]

table(x$r6d[x$resframe == "indirect"], x$r6_lean[x$resframe == "indirect"], useNA = "always")
table(x$r6dl[x$resframe == "indirect"], x$r6_lean[x$resframe == "indirect"], useNA = "always")

table(x$r6i[x$resframe == "direct"], x$r6_lean[x$resframe == "direct"], useNA = "always")
table(x$r6il[x$resframe == "direct"], x$r6_lean[x$resframe == "direct"], useNA = "always")

# Rename r5tu_4, r5w_1, r6tu_4, r6w_1

names(x)[names(x) == "r5tu_4"] <- "r5tu"
names(x)[names(x) == "r5w_1"] <- "r5w"

names(x)[names(x) == "r6tu_4"] <- "r6tu"
names(x)[names(x) == "r6w_1"] <- "r6w"

# Recode Party ID

table(x$partyid, useNA = "always")
table(x$partyid, x$lean, useNA = "always")

x$dem <- 0
x$dem[x$partyid == 1 | (x$partyid == 3 & !is.na(x$lean) & x$lean == 1)] <- 1
table(x$dem, x$partyid, useNA = "always")
table(x$dem, x$lean, useNA = "always")

x$rep <- 0
x$rep[x$partyid == 2 | (x$partyid == 3 & !is.na(x$lean) & x$lean == 2)] <- 1
table(x$rep, x$partyid, useNA = "always")
table(x$rep, x$lean, useNA = "always")
